{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 设置driver"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [],
   "source": [
    "#准备工作\n",
    "import pandas as pd\n",
    "import time\n",
    "from random import random"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-72-8ae4025e7ff4>:19: DeprecationWarning: use options instead of chrome_options\n",
      "  driver = webdriver.Chrome( chrome_options = opts) #desired_capabilities=caps,\n"
     ]
    }
   ],
   "source": [
    "from selenium import webdriver\n",
    "from selenium.webdriver.common.desired_capabilities import DesiredCapabilities\n",
    "\n",
    "\n",
    "#caps=dict()\n",
    "#caps[\"pageLoadStrategy\"] = \"none\"   # Do not wait for full page load\n",
    "\n",
    "opts = webdriver.ChromeOptions()\n",
    "opts.add_argument('--no-sandbox')#解决DevToolsActivePort文件不存在的报错\n",
    "opts.add_argument('window-size=1920x3000') #指定浏览器分辨率\n",
    "opts.add_argument('--disable-gpu') #谷歌文档提到需要加上一这个属性来规避bug\n",
    "opts.add_argument('--hide-scrollbars') #隐藏滚动条, 应对些特殊页面\n",
    "#opts.add_argument('blink-settings=imagesEnabled=false') #不加载图片, 提升速度\n",
    "#opts.add_argument('--headless') #浏览器不提供可视化页面. linux下如果系统不支持可视化不加这条会启动失败\n",
    "# opts.binary_location = \"C:\\portable\\PortableApps\\IronPortable\\App\\Iron\\chrome.exe\"\n",
    "# opts.binary_location = \"C:\\Program Files\\Google\\Chrome\\Application\\chromedriver.exe\" #\"H:\\_coding_\\Gitee\\InternetNewMedia\\CapstonePrj2016\\chromedriver.exe\"  \n",
    "\n",
    "\n",
    "driver = webdriver.Chrome( chrome_options = opts) #desired_capabilities=caps,"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 打开CNKI\n",
    "* 1.校园网，自动登录cnki.net\n",
    "* 2.校外网，需要登录fsso.cnki.net"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [],
   "source": [
    "driver.get('https://www.cnki.net/')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 检查是否是中山大学南方学院登录（检查中山大学南方学院资源、保证后续可以下载）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'中山大学南方学院'"
      ]
     },
     "execution_count": 74,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "driver.find_element_by_id('Ecp_loginShowName1').get_attribute('innerHTML')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 打开高级检索"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [],
   "source": [
    "element=driver.find_element_by_id('highSearch')\n",
    "element.get_attribute('innerHTML')\n",
    "element.click()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 切换窗口"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 检查窗口\n",
    "眼见不一定为实"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'CDwindow-6E7CF300A0C8FC55231D3AB1F6AB0092'"
      ]
     },
     "execution_count": 76,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#当前窗口ID\n",
    "driver.current_window_handle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['CDwindow-6E7CF300A0C8FC55231D3AB1F6AB0092',\n",
       " 'CDwindow-C9B916AC89729DA5886C030FD7059349']"
      ]
     },
     "execution_count": 77,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#所有窗口ID\n",
    "driver.window_handles"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-78-2c997ac77236>:2: DeprecationWarning: use driver.switch_to.window instead\n",
      "  driver.switch_to_window(driver.window_handles[1])\n"
     ]
    }
   ],
   "source": [
    "#切换窗口\n",
    "driver.switch_to_window(driver.window_handles[1])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 选择“学术期刊”"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [],
   "source": [
    "element=driver.find_element_by_xpath('/html/body/div[3]/div[1]/div/ul[1]/li[1]/a/span')\n",
    "element.get_attribute(\"innerHTML\")\n",
    "element.click()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 选择专业检索"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [],
   "source": [
    "element=driver.find_element_by_name('majorSearch')\n",
    "element.get_attribute(\"innerHTML\")\n",
    "element.click()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 勾选SCI、CSSCI、CSCD、EI"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [],
   "source": [
    "#sci\n",
    "element=driver.find_element_by_xpath('//input[@key=\"CSI\"]')\n",
    "element.click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [],
   "source": [
    "#cssci\n",
    "element=driver.find_element_by_xpath('//input[@key=\"SI\"]')\n",
    "element.click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {},
   "outputs": [],
   "source": [
    "#cscd\n",
    "element=driver.find_element_by_xpath('//input[@key=\"CSD\"]')\n",
    "element.click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {},
   "outputs": [],
   "source": [
    "#EI\n",
    "element=driver.find_element_by_xpath('//input[@key=\"EI\"]')\n",
    "element.click()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 设置搜索query"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {},
   "outputs": [],
   "source": [
    "query = 'SU = \"新媒体\" AND TI =\"网络\" '"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {},
   "outputs": [],
   "source": [
    "element=driver.find_element_by_xpath('//textarea')\n",
    "element.clear()\n",
    "element.send_keys(query)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {},
   "outputs": [],
   "source": [
    "element=driver.find_element_by_xpath('//input[@value=\"检索\"]')\n",
    "element.click()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 检查检索文章总数量的信息"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'共找到<em>900</em>条结果'"
      ]
     },
     "execution_count": 88,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#\n",
    "element=driver.find_element_by_xpath('//span[@class=\"pagerTitleCell\"]')\n",
    "element.get_attribute(\"innerHTML\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 更换页面文章数量（每页50条结果）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {},
   "outputs": [],
   "source": [
    "element=driver.find_element_by_xpath('//i[@class=\"icon icon-sort\"]')\n",
    "element.click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "metadata": {},
   "outputs": [],
   "source": [
    "element = driver.find_element_by_xpath('//div[@id=\"perPageDiv\"]//li[@data-val=\"50\"]/a').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'1/18'"
      ]
     },
     "execution_count": 91,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#检索文章总页数？\n",
    "element = driver.find_element_by_xpath('//span[@class=\"countPageMark\"]')\n",
    "element.get_attribute('innerHTML')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 抓取页面信息"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>篇名</th>\n",
       "      <th>作者</th>\n",
       "      <th>刊名</th>\n",
       "      <th>发表时间</th>\n",
       "      <th>被引</th>\n",
       "      <th>下载</th>\n",
       "      <th>操作</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>“转发”行为的扩散与新媒体赋权——基于微博自闭症议题的社会网络分析</td>\n",
       "      <td>黄月琴; 黄宪成</td>\n",
       "      <td>新闻记者</td>\n",
       "      <td>2021-05-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>397</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>嬗变、冲突与重构：新媒体视域下的网络舆论</td>\n",
       "      <td>陈晓伟; 董烁</td>\n",
       "      <td>中国编辑</td>\n",
       "      <td>2021-05-10</td>\n",
       "      <td>NaN</td>\n",
       "      <td>226</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>新媒体时代图书网络营销矩阵建设实务研究</td>\n",
       "      <td>郑丽珠</td>\n",
       "      <td>出版广角</td>\n",
       "      <td>2021-04-30</td>\n",
       "      <td>NaN</td>\n",
       "      <td>57</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>嵌入基层治理：县级融媒体中心与基层网络政务服务的融合发展</td>\n",
       "      <td>谢新洲; 石林</td>\n",
       "      <td>传媒</td>\n",
       "      <td>2021-04-25</td>\n",
       "      <td>NaN</td>\n",
       "      <td>198</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>长城新媒体集团融合创新春晚形态——“河北网络春节云联欢”云端放异彩</td>\n",
       "      <td>李建; 田少华; 李遥</td>\n",
       "      <td>传媒</td>\n",
       "      <td>2021-04-25</td>\n",
       "      <td>NaN</td>\n",
       "      <td>18</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>网络时代红色资源在高校思政课中的应用</td>\n",
       "      <td>范小青</td>\n",
       "      <td>学校党建与思想教育</td>\n",
       "      <td>2021-03-23</td>\n",
       "      <td>NaN</td>\n",
       "      <td>485</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>7</td>\n",
       "      <td>现状与特征:社会网络分析在我国传播学研究中的应用</td>\n",
       "      <td>瞿旭晟; 赵鹏程</td>\n",
       "      <td>新闻爱好者</td>\n",
       "      <td>2021-03-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>318</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>8</td>\n",
       "      <td>从娱乐至死到网络至死:新媒体发展与城市生活状态</td>\n",
       "      <td>廖媌婧; 曾庆江</td>\n",
       "      <td>新闻爱好者</td>\n",
       "      <td>2021-03-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>244</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>9</td>\n",
       "      <td>信息化时代我国网络政治生态治理研究</td>\n",
       "      <td>许开轶</td>\n",
       "      <td>理论学刊</td>\n",
       "      <td>2021-03-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>82</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>10</td>\n",
       "      <td>出圈与折叠：2020年网络热点事件的舆论特征及对内容生产的意义</td>\n",
       "      <td>周葆华</td>\n",
       "      <td>新闻界</td>\n",
       "      <td>2021-03-08 13:06</td>\n",
       "      <td>NaN</td>\n",
       "      <td>762</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>11</td>\n",
       "      <td>关于高校网络意识形态安全建设的新考量</td>\n",
       "      <td>潘红涛</td>\n",
       "      <td>学校党建与思想教育</td>\n",
       "      <td>2021-03-08</td>\n",
       "      <td>1.0</td>\n",
       "      <td>362</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>12</td>\n",
       "      <td>提升高校社会主义核心价值观网络传播效果研究</td>\n",
       "      <td>高蕾; 魏楚元; 王洋</td>\n",
       "      <td>传媒</td>\n",
       "      <td>2021-02-25</td>\n",
       "      <td>NaN</td>\n",
       "      <td>113</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>13</td>\n",
       "      <td>网络时代民族团结教育研究——铸牢中华民族共同体意识研究系列论文之一</td>\n",
       "      <td>王卓</td>\n",
       "      <td>广西民族研究</td>\n",
       "      <td>2021-02-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>167</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>14</td>\n",
       "      <td>浅析广播电视新闻评论在网络媒体中的新常态运用</td>\n",
       "      <td>李节; 钟强</td>\n",
       "      <td>当代电视</td>\n",
       "      <td>2021-02-01</td>\n",
       "      <td>NaN</td>\n",
       "      <td>107</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>15</td>\n",
       "      <td>基于网络结构与内容分布的新媒体事件聚类研究</td>\n",
       "      <td>马昊; 马晓悦</td>\n",
       "      <td>现代情报</td>\n",
       "      <td>2021-02-01</td>\n",
       "      <td>NaN</td>\n",
       "      <td>152</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>16</td>\n",
       "      <td>新媒体时代公众参与网络信息治理的实现路径</td>\n",
       "      <td>魏小雨</td>\n",
       "      <td>新闻爱好者</td>\n",
       "      <td>2021-01-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>103</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>17</td>\n",
       "      <td>提升高校网络育人成效的路径研究</td>\n",
       "      <td>丰硕</td>\n",
       "      <td>学校党建与思想教育</td>\n",
       "      <td>2021-01-18</td>\n",
       "      <td>NaN</td>\n",
       "      <td>487</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>18</td>\n",
       "      <td>媒介素养研究核心议题:基于CSSCI期刊关键词网络分析</td>\n",
       "      <td>罗雁飞</td>\n",
       "      <td>中国出版</td>\n",
       "      <td>2021-01-16</td>\n",
       "      <td>NaN</td>\n",
       "      <td>524</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>19</td>\n",
       "      <td>2020年网络新媒体传播:重大现实主题与学科研究进展</td>\n",
       "      <td>孟威</td>\n",
       "      <td>当代传播</td>\n",
       "      <td>2021-01-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>459</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>20</td>\n",
       "      <td>网络青年亚文化的特征及引领路径探析</td>\n",
       "      <td>谌韵灵; 邹升平</td>\n",
       "      <td>南通大学学报(社会科学版)</td>\n",
       "      <td>2021-01-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>860</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>21</td>\n",
       "      <td>网络的法律地位:行政确认与《民法典》法律界定</td>\n",
       "      <td>陆小华</td>\n",
       "      <td>山西大学学报(哲学社会科学版)</td>\n",
       "      <td>2021-01-15</td>\n",
       "      <td>2.0</td>\n",
       "      <td>153</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>22</td>\n",
       "      <td>构建网络内容治理主体协同机制的作用与优化路径</td>\n",
       "      <td>谢新洲; 宋琢</td>\n",
       "      <td>新闻与写作</td>\n",
       "      <td>2021-01-05</td>\n",
       "      <td>1.0</td>\n",
       "      <td>373</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>23</td>\n",
       "      <td>大学生网络思想政治教育的范式演进与经验启示</td>\n",
       "      <td>梁钦; 蒲清平; 肖国芳</td>\n",
       "      <td>思想政治教育研究</td>\n",
       "      <td>2020-12-20</td>\n",
       "      <td>2.0</td>\n",
       "      <td>755</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>24</td>\n",
       "      <td>基于舆论引导的网络新闻传播规划——评《新闻传播学热点专题:知识图谱》</td>\n",
       "      <td>柳太江</td>\n",
       "      <td>中国油脂</td>\n",
       "      <td>2020-12-16</td>\n",
       "      <td>1.0</td>\n",
       "      <td>228</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>25</td>\n",
       "      <td>新媒体产业资本流通与价值转移的影响机制研究——以网络视听行业为例</td>\n",
       "      <td>王建磊</td>\n",
       "      <td>新闻大学</td>\n",
       "      <td>2020-12-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>431</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>26</td>\n",
       "      <td>青年大学生参与网络争议的态度、归因与表现特征——基于《后浪》争议的新媒体时代探究</td>\n",
       "      <td>玄铮</td>\n",
       "      <td>中国青年研究</td>\n",
       "      <td>2020-12-05</td>\n",
       "      <td>1.0</td>\n",
       "      <td>731</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>27</td>\n",
       "      <td>新时代网络舆论生态治理的内在逻辑及实践指向</td>\n",
       "      <td>肖唤元; 郑晶晶</td>\n",
       "      <td>思想教育研究</td>\n",
       "      <td>2020-11-25</td>\n",
       "      <td>NaN</td>\n",
       "      <td>629</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>28</td>\n",
       "      <td>5G+直播:探索网络媒体“新闻+政务服务商务”的运营模式</td>\n",
       "      <td>杨谷</td>\n",
       "      <td>传媒</td>\n",
       "      <td>2020-11-25</td>\n",
       "      <td>NaN</td>\n",
       "      <td>281</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>29</td>\n",
       "      <td>重大疫情应对中的网络次生舆情治理探析</td>\n",
       "      <td>马翔飞; 阮一帆</td>\n",
       "      <td>学校党建与思想教育</td>\n",
       "      <td>2020-11-23</td>\n",
       "      <td>NaN</td>\n",
       "      <td>421</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>30</td>\n",
       "      <td>“青年抗疫守卫队”的媒介实践与社会行动——对一个湖北村庄的网络民族志研究</td>\n",
       "      <td>周孟杰; 吴玮</td>\n",
       "      <td>当代青年研究</td>\n",
       "      <td>2020-11-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>359</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>31</td>\n",
       "      <td>网络时代媒体影响力:媒体效果研究视角</td>\n",
       "      <td>李喜根</td>\n",
       "      <td>全球传媒学刊</td>\n",
       "      <td>2020-11-18 11:41</td>\n",
       "      <td>NaN</td>\n",
       "      <td>780</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>32</td>\n",
       "      <td>新媒体背景下的高校网络思想政治教育工作：价值、挑战与应对</td>\n",
       "      <td>胡逢源</td>\n",
       "      <td>国家教育行政学院学报</td>\n",
       "      <td>2020-11-15</td>\n",
       "      <td>1.0</td>\n",
       "      <td>401</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>33</td>\n",
       "      <td>社交媒体环境下网络舆论引导策略研究</td>\n",
       "      <td>刘锦宏; 张永薇</td>\n",
       "      <td>出版广角</td>\n",
       "      <td>2020-11-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>700</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>34</td>\n",
       "      <td>重大疫情防控中的网络舆情及其信息治理策略——基于“弹簧”动力模型分析</td>\n",
       "      <td>吕朝辉; 程子恒</td>\n",
       "      <td>情报杂志</td>\n",
       "      <td>2020-11-12 15:15</td>\n",
       "      <td>1.0</td>\n",
       "      <td>911</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>35</td>\n",
       "      <td>新媒体环境下基于动机理论的高校负面网络舆情传播研究</td>\n",
       "      <td>陈晓燕; 何有世</td>\n",
       "      <td>高校教育管理</td>\n",
       "      <td>2020-11-09 15:14</td>\n",
       "      <td>1.0</td>\n",
       "      <td>710</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>36</td>\n",
       "      <td>话语结构、思维演进与智能化转向:作为政治新图景的中国网络问政</td>\n",
       "      <td>陈刚; 王卿</td>\n",
       "      <td>新闻与传播评论</td>\n",
       "      <td>2020-11-03</td>\n",
       "      <td>NaN</td>\n",
       "      <td>424</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>37</td>\n",
       "      <td>网络语境下媒介使用对用户行为表征的影响研究</td>\n",
       "      <td>阳长征</td>\n",
       "      <td>新闻与传播评论</td>\n",
       "      <td>2020-11-03</td>\n",
       "      <td>NaN</td>\n",
       "      <td>330</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>38</td>\n",
       "      <td>新媒体时代主体间性的建构——以“豆瓣”的网络乌托邦为例</td>\n",
       "      <td>唐海龙</td>\n",
       "      <td>文艺争鸣</td>\n",
       "      <td>2020-10-25</td>\n",
       "      <td>NaN</td>\n",
       "      <td>389</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>39</td>\n",
       "      <td>新媒体时代网络文学改编剧的叙事与传播转向——以IP网剧《长安十二时辰》为例</td>\n",
       "      <td>杨雅捷; 朱殿勇</td>\n",
       "      <td>新闻爱好者</td>\n",
       "      <td>2020-10-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>547</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>40</td>\n",
       "      <td>新媒体背景下政府网络舆情治理能力提升路径探析</td>\n",
       "      <td>普霞; 李培林</td>\n",
       "      <td>新闻爱好者</td>\n",
       "      <td>2020-10-20</td>\n",
       "      <td>1.0</td>\n",
       "      <td>531</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>41</td>\n",
       "      <td>北京文化形象的媒体呈现——基于大数据和社会网络分析方法</td>\n",
       "      <td>宋凯</td>\n",
       "      <td>现代传播(中国传媒大学学报)</td>\n",
       "      <td>2020-10-15</td>\n",
       "      <td>1.0</td>\n",
       "      <td>833</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>42</td>\n",
       "      <td>技术与可视化:网络与新媒体专业人才培养的新取向</td>\n",
       "      <td>王秀丽</td>\n",
       "      <td>传媒</td>\n",
       "      <td>2020-10-10</td>\n",
       "      <td>NaN</td>\n",
       "      <td>134</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>43</td>\n",
       "      <td>网络直播的内容生产逻辑及优化策略</td>\n",
       "      <td>张科</td>\n",
       "      <td>中国编辑</td>\n",
       "      <td>2020-10-10</td>\n",
       "      <td>1.0</td>\n",
       "      <td>445</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>44</td>\n",
       "      <td>重大突发公共卫生事件中主流媒体与网络舆情有机运动关系探讨——以新冠肺炎疫情信息传播为例</td>\n",
       "      <td>马缘园</td>\n",
       "      <td>新闻爱好者</td>\n",
       "      <td>2020-09-20</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1054</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>45</td>\n",
       "      <td>网络直播在学术期刊中的应用探索</td>\n",
       "      <td>丁合; 张雷</td>\n",
       "      <td>科技与出版</td>\n",
       "      <td>2020-09-16 15:11</td>\n",
       "      <td>1.0</td>\n",
       "      <td>342</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>46</td>\n",
       "      <td>新媒体环境下网络舆情引导的现实挑战和实践进路</td>\n",
       "      <td>王妍; 胡箫寒</td>\n",
       "      <td>艺术百家</td>\n",
       "      <td>2020-09-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>140</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>47</td>\n",
       "      <td>二元性互构：选择性接触影响下的青年网络政治意见表达</td>\n",
       "      <td>晏齐宏</td>\n",
       "      <td>新闻大学</td>\n",
       "      <td>2020-09-15</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1243</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>48</td>\n",
       "      <td>网络泛娱乐化:青年主流意识形态的“遮蔽”及其“解蔽”</td>\n",
       "      <td>杨章文</td>\n",
       "      <td>探索</td>\n",
       "      <td>2020-09-15</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1946</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>49</td>\n",
       "      <td>学术期刊网络舆情危机与对策</td>\n",
       "      <td>张小强; 刘文斌</td>\n",
       "      <td>中国科技期刊研究</td>\n",
       "      <td>2020-09-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>104</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>50</td>\n",
       "      <td>治理视域下的高校网络舆情应对策略</td>\n",
       "      <td>王楠; 王保华</td>\n",
       "      <td>思想理论教育</td>\n",
       "      <td>2020-09-10</td>\n",
       "      <td>3.0</td>\n",
       "      <td>665</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    Unnamed: 0                                           篇名            作者  \\\n",
       "0            1            “转发”行为的扩散与新媒体赋权——基于微博自闭症议题的社会网络分析      黄月琴; 黄宪成   \n",
       "1            2                         嬗变、冲突与重构：新媒体视域下的网络舆论       陈晓伟; 董烁   \n",
       "2            3                          新媒体时代图书网络营销矩阵建设实务研究           郑丽珠   \n",
       "3            4                 嵌入基层治理：县级融媒体中心与基层网络政务服务的融合发展       谢新洲; 石林   \n",
       "4            5            长城新媒体集团融合创新春晚形态——“河北网络春节云联欢”云端放异彩   李建; 田少华; 李遥   \n",
       "5            6                           网络时代红色资源在高校思政课中的应用           范小青   \n",
       "6            7                     现状与特征:社会网络分析在我国传播学研究中的应用      瞿旭晟; 赵鹏程   \n",
       "7            8                      从娱乐至死到网络至死:新媒体发展与城市生活状态      廖媌婧; 曾庆江   \n",
       "8            9                            信息化时代我国网络政治生态治理研究           许开轶   \n",
       "9           10              出圈与折叠：2020年网络热点事件的舆论特征及对内容生产的意义           周葆华   \n",
       "10          11                           关于高校网络意识形态安全建设的新考量           潘红涛   \n",
       "11          12                        提升高校社会主义核心价值观网络传播效果研究   高蕾; 魏楚元; 王洋   \n",
       "12          13            网络时代民族团结教育研究——铸牢中华民族共同体意识研究系列论文之一            王卓   \n",
       "13          14                       浅析广播电视新闻评论在网络媒体中的新常态运用        李节; 钟强   \n",
       "14          15                        基于网络结构与内容分布的新媒体事件聚类研究       马昊; 马晓悦   \n",
       "15          16                         新媒体时代公众参与网络信息治理的实现路径           魏小雨   \n",
       "16          17                              提升高校网络育人成效的路径研究            丰硕   \n",
       "17          18                  媒介素养研究核心议题:基于CSSCI期刊关键词网络分析           罗雁飞   \n",
       "18          19                   2020年网络新媒体传播:重大现实主题与学科研究进展            孟威   \n",
       "19          20                            网络青年亚文化的特征及引领路径探析      谌韵灵; 邹升平   \n",
       "20          21                       网络的法律地位:行政确认与《民法典》法律界定           陆小华   \n",
       "21          22                       构建网络内容治理主体协同机制的作用与优化路径       谢新洲; 宋琢   \n",
       "22          23                        大学生网络思想政治教育的范式演进与经验启示  梁钦; 蒲清平; 肖国芳   \n",
       "23          24           基于舆论引导的网络新闻传播规划——评《新闻传播学热点专题:知识图谱》           柳太江   \n",
       "24          25             新媒体产业资本流通与价值转移的影响机制研究——以网络视听行业为例           王建磊   \n",
       "25          26     青年大学生参与网络争议的态度、归因与表现特征——基于《后浪》争议的新媒体时代探究            玄铮   \n",
       "26          27                        新时代网络舆论生态治理的内在逻辑及实践指向      肖唤元; 郑晶晶   \n",
       "27          28                 5G+直播:探索网络媒体“新闻+政务服务商务”的运营模式            杨谷   \n",
       "28          29                           重大疫情应对中的网络次生舆情治理探析      马翔飞; 阮一帆   \n",
       "29          30         “青年抗疫守卫队”的媒介实践与社会行动——对一个湖北村庄的网络民族志研究       周孟杰; 吴玮   \n",
       "30          31                           网络时代媒体影响力:媒体效果研究视角           李喜根   \n",
       "31          32                 新媒体背景下的高校网络思想政治教育工作：价值、挑战与应对           胡逢源   \n",
       "32          33                            社交媒体环境下网络舆论引导策略研究      刘锦宏; 张永薇   \n",
       "33          34           重大疫情防控中的网络舆情及其信息治理策略——基于“弹簧”动力模型分析      吕朝辉; 程子恒   \n",
       "34          35                    新媒体环境下基于动机理论的高校负面网络舆情传播研究      陈晓燕; 何有世   \n",
       "35          36               话语结构、思维演进与智能化转向:作为政治新图景的中国网络问政        陈刚; 王卿   \n",
       "36          37                        网络语境下媒介使用对用户行为表征的影响研究           阳长征   \n",
       "37          38                  新媒体时代主体间性的建构——以“豆瓣”的网络乌托邦为例           唐海龙   \n",
       "38          39        新媒体时代网络文学改编剧的叙事与传播转向——以IP网剧《长安十二时辰》为例      杨雅捷; 朱殿勇   \n",
       "39          40                       新媒体背景下政府网络舆情治理能力提升路径探析       普霞; 李培林   \n",
       "40          41                  北京文化形象的媒体呈现——基于大数据和社会网络分析方法            宋凯   \n",
       "41          42                      技术与可视化:网络与新媒体专业人才培养的新取向           王秀丽   \n",
       "42          43                             网络直播的内容生产逻辑及优化策略            张科   \n",
       "43          44  重大突发公共卫生事件中主流媒体与网络舆情有机运动关系探讨——以新冠肺炎疫情信息传播为例           马缘园   \n",
       "44          45                              网络直播在学术期刊中的应用探索        丁合; 张雷   \n",
       "45          46                       新媒体环境下网络舆情引导的现实挑战和实践进路       王妍; 胡箫寒   \n",
       "46          47                    二元性互构：选择性接触影响下的青年网络政治意见表达           晏齐宏   \n",
       "47          48                   网络泛娱乐化:青年主流意识形态的“遮蔽”及其“解蔽”           杨章文   \n",
       "48          49                                学术期刊网络舆情危机与对策      张小强; 刘文斌   \n",
       "49          50                             治理视域下的高校网络舆情应对策略       王楠; 王保华   \n",
       "\n",
       "                 刊名              发表时间   被引    下载  操作  \n",
       "0              新闻记者        2021-05-20  NaN   397  下载  \n",
       "1              中国编辑        2021-05-10  NaN   226  下载  \n",
       "2              出版广角        2021-04-30  NaN    57  下载  \n",
       "3                传媒        2021-04-25  NaN   198  下载  \n",
       "4                传媒        2021-04-25  NaN    18  下载  \n",
       "5         学校党建与思想教育        2021-03-23  NaN   485  下载  \n",
       "6             新闻爱好者        2021-03-20  NaN   318  下载  \n",
       "7             新闻爱好者        2021-03-20  NaN   244  下载  \n",
       "8              理论学刊        2021-03-15  NaN    82  下载  \n",
       "9               新闻界  2021-03-08 13:06  NaN   762  下载  \n",
       "10        学校党建与思想教育        2021-03-08  1.0   362  下载  \n",
       "11               传媒        2021-02-25  NaN   113  下载  \n",
       "12           广西民族研究        2021-02-20  NaN   167  下载  \n",
       "13             当代电视        2021-02-01  NaN   107  下载  \n",
       "14             现代情报        2021-02-01  NaN   152  下载  \n",
       "15            新闻爱好者        2021-01-20  NaN   103  下载  \n",
       "16        学校党建与思想教育        2021-01-18  NaN   487  下载  \n",
       "17             中国出版        2021-01-16  NaN   524  下载  \n",
       "18             当代传播        2021-01-15  NaN   459  下载  \n",
       "19    南通大学学报(社会科学版)        2021-01-15  NaN   860  下载  \n",
       "20  山西大学学报(哲学社会科学版)        2021-01-15  2.0   153  下载  \n",
       "21            新闻与写作        2021-01-05  1.0   373  下载  \n",
       "22         思想政治教育研究        2020-12-20  2.0   755  下载  \n",
       "23             中国油脂        2020-12-16  1.0   228  下载  \n",
       "24             新闻大学        2020-12-15  NaN   431  下载  \n",
       "25           中国青年研究        2020-12-05  1.0   731  下载  \n",
       "26           思想教育研究        2020-11-25  NaN   629  下载  \n",
       "27               传媒        2020-11-25  NaN   281  下载  \n",
       "28        学校党建与思想教育        2020-11-23  NaN   421  下载  \n",
       "29           当代青年研究        2020-11-20  NaN   359  下载  \n",
       "30           全球传媒学刊  2020-11-18 11:41  NaN   780  下载  \n",
       "31       国家教育行政学院学报        2020-11-15  1.0   401  下载  \n",
       "32             出版广角        2020-11-15  NaN   700  下载  \n",
       "33             情报杂志  2020-11-12 15:15  1.0   911  下载  \n",
       "34           高校教育管理  2020-11-09 15:14  1.0   710  下载  \n",
       "35          新闻与传播评论        2020-11-03  NaN   424  下载  \n",
       "36          新闻与传播评论        2020-11-03  NaN   330  下载  \n",
       "37             文艺争鸣        2020-10-25  NaN   389  下载  \n",
       "38            新闻爱好者        2020-10-20  NaN   547  下载  \n",
       "39            新闻爱好者        2020-10-20  1.0   531  下载  \n",
       "40   现代传播(中国传媒大学学报)        2020-10-15  1.0   833  下载  \n",
       "41               传媒        2020-10-10  NaN   134  下载  \n",
       "42             中国编辑        2020-10-10  1.0   445  下载  \n",
       "43            新闻爱好者        2020-09-20  1.0  1054  下载  \n",
       "44            科技与出版  2020-09-16 15:11  1.0   342  下载  \n",
       "45             艺术百家        2020-09-15  NaN   140  下载  \n",
       "46             新闻大学        2020-09-15  1.0  1243  下载  \n",
       "47               探索        2020-09-15  2.0  1946  下载  \n",
       "48         中国科技期刊研究        2020-09-15  NaN   104  下载  \n",
       "49           思想理论教育        2020-09-10  3.0   665  下载  "
      ]
     },
     "execution_count": 92,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#抓取第一页页面信息\n",
    "element=driver.find_element_by_id('gridTable')\n",
    "含有页面主要数据的表格_HTML=element.get_attribute(\"innerHTML\")\n",
    "数据 = pd.read_html(含有页面主要数据的表格_HTML)[0]\n",
    "数据"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 翻页"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'下一页'"
      ]
     },
     "execution_count": 93,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "element = driver.find_element_by_id('PageNext')\n",
    "element.get_attribute(\"innerHTML\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {},
   "outputs": [],
   "source": [
    "表格_html = dict()\n",
    "main_content =\"\"\n",
    "element = None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]\n"
     ]
    }
   ],
   "source": [
    "#查看所有页数\n",
    "pages = list(range(1,18))\n",
    "print(pages)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {},
   "outputs": [],
   "source": [
    "def process_pages (pages):\n",
    "    for p in pages:\n",
    "        print (p,end='\\t')\n",
    "        下一页 = driver.find_element_by_id('PageNext')\n",
    "        下一页.click()\n",
    "        time.sleep(20+10*random())\n",
    "        \n",
    "        #获取含有页面主要数据的表格\n",
    "        element = driver.find_element_by_id('gridTable')\n",
    "        main_content = element.get_attribute('innerHTML')\n",
    "        表格_html[p] = main_content"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1\t2\t3\t4\t5\t6\t7\t8\t9\t10\t11\t12\t13\t14\t15\t16\t17\t"
     ]
    }
   ],
   "source": [
    "process_pages(pages)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>html_snippets</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                        html_snippets\n",
       "1   \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "2   \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "3   \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "4   \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "5   \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "6   \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "7   \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "8   \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "9   \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "10  \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "11  \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "12  \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "13  \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "14  \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "15  \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "16  \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "17  \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ..."
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "df = pd.DataFrame([表格_html]).T\n",
    "df.columns = [\"html_snippets\"]\n",
    "display(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {},
   "outputs": [],
   "source": [
    "网站 = \"中国知网\"\n",
    "fn = { \"output\" : { \"htm_snippets\": \"data/htm_snippets_{网站}.tsv\"}\n",
    "     }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "metadata": {},
   "outputs": [],
   "source": [
    "filename = fn [\"output\"] [\"htm_snippets\"] \n",
    "df.to_csv(filename.format(网站=网站), sep=\"\\t\", encoding=\"utf8\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {},
   "outputs": [],
   "source": [
    "l_df = []\n",
    "for p in pages:\n",
    "    表格 = pd.read_html(表格_html[p])[0]\n",
    "    l_df.append(表格)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>篇名</th>\n",
       "      <th>作者</th>\n",
       "      <th>刊名</th>\n",
       "      <th>发表时间</th>\n",
       "      <th>被引</th>\n",
       "      <th>下载</th>\n",
       "      <th>操作</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>“转发”行为的扩散与新媒体赋权——基于微博自闭症议题的社会网络分析</td>\n",
       "      <td>黄月琴; 黄宪成</td>\n",
       "      <td>新闻记者</td>\n",
       "      <td>2021-05-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>397</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>嬗变、冲突与重构：新媒体视域下的网络舆论</td>\n",
       "      <td>陈晓伟; 董烁</td>\n",
       "      <td>中国编辑</td>\n",
       "      <td>2021-05-10</td>\n",
       "      <td>NaN</td>\n",
       "      <td>226</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>新媒体时代图书网络营销矩阵建设实务研究</td>\n",
       "      <td>郑丽珠</td>\n",
       "      <td>出版广角</td>\n",
       "      <td>2021-04-30</td>\n",
       "      <td>NaN</td>\n",
       "      <td>57</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>嵌入基层治理：县级融媒体中心与基层网络政务服务的融合发展</td>\n",
       "      <td>谢新洲; 石林</td>\n",
       "      <td>传媒</td>\n",
       "      <td>2021-04-25</td>\n",
       "      <td>NaN</td>\n",
       "      <td>198</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>长城新媒体集团融合创新春晚形态——“河北网络春节云联欢”云端放异彩</td>\n",
       "      <td>李建; 田少华; 李遥</td>\n",
       "      <td>传媒</td>\n",
       "      <td>2021-04-25</td>\n",
       "      <td>NaN</td>\n",
       "      <td>18</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>845</th>\n",
       "      <td>896</td>\n",
       "      <td>新世纪的网络传播发展——“新世纪网络传播发展国际论坛”研讨会综述</td>\n",
       "      <td>王蕾</td>\n",
       "      <td>新闻记者</td>\n",
       "      <td>2001-06-05</td>\n",
       "      <td>NaN</td>\n",
       "      <td>132</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>846</th>\n",
       "      <td>897</td>\n",
       "      <td>论网络广播──网络广播现状和经营理念</td>\n",
       "      <td>杨叶青</td>\n",
       "      <td>现代传播-北京广播学院学报</td>\n",
       "      <td>2000-12-15</td>\n",
       "      <td>13.0</td>\n",
       "      <td>344</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>847</th>\n",
       "      <td>898</td>\n",
       "      <td>发展新媒体的若干思考——由世界网络所想到的</td>\n",
       "      <td>林涛</td>\n",
       "      <td>中国广播电视学刊</td>\n",
       "      <td>2000-11-25</td>\n",
       "      <td>1.0</td>\n",
       "      <td>76</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>848</th>\n",
       "      <td>899</td>\n",
       "      <td>网络时代的对话与交流——新媒体技术2000年报告会内容纪要</td>\n",
       "      <td>钟新</td>\n",
       "      <td>国际新闻界</td>\n",
       "      <td>2000-09-25</td>\n",
       "      <td>NaN</td>\n",
       "      <td>235</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>849</th>\n",
       "      <td>900</td>\n",
       "      <td>寻求机遇 再造辉煌——网络传播时代中国电视业的生存及发展探析</td>\n",
       "      <td>程莉</td>\n",
       "      <td>中国广播电视学刊</td>\n",
       "      <td>2000-07-25</td>\n",
       "      <td>1.0</td>\n",
       "      <td>52</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>900 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     Unnamed: 0                                 篇名           作者  \\\n",
       "0             1  “转发”行为的扩散与新媒体赋权——基于微博自闭症议题的社会网络分析     黄月琴; 黄宪成   \n",
       "1             2               嬗变、冲突与重构：新媒体视域下的网络舆论      陈晓伟; 董烁   \n",
       "2             3                新媒体时代图书网络营销矩阵建设实务研究          郑丽珠   \n",
       "3             4       嵌入基层治理：县级融媒体中心与基层网络政务服务的融合发展      谢新洲; 石林   \n",
       "4             5  长城新媒体集团融合创新春晚形态——“河北网络春节云联欢”云端放异彩  李建; 田少华; 李遥   \n",
       "..          ...                                ...          ...   \n",
       "845         896   新世纪的网络传播发展——“新世纪网络传播发展国际论坛”研讨会综述           王蕾   \n",
       "846         897                 论网络广播──网络广播现状和经营理念          杨叶青   \n",
       "847         898              发展新媒体的若干思考——由世界网络所想到的           林涛   \n",
       "848         899      网络时代的对话与交流——新媒体技术2000年报告会内容纪要           钟新   \n",
       "849         900     寻求机遇 再造辉煌——网络传播时代中国电视业的生存及发展探析           程莉   \n",
       "\n",
       "                刊名        发表时间    被引   下载  操作  \n",
       "0             新闻记者  2021-05-20   NaN  397  下载  \n",
       "1             中国编辑  2021-05-10   NaN  226  下载  \n",
       "2             出版广角  2021-04-30   NaN   57  下载  \n",
       "3               传媒  2021-04-25   NaN  198  下载  \n",
       "4               传媒  2021-04-25   NaN   18  下载  \n",
       "..             ...         ...   ...  ...  ..  \n",
       "845           新闻记者  2001-06-05   NaN  132  下载  \n",
       "846  现代传播-北京广播学院学报  2000-12-15  13.0  344  下载  \n",
       "847       中国广播电视学刊  2000-11-25   1.0   76  下载  \n",
       "848          国际新闻界  2000-09-25   NaN  235  下载  \n",
       "849       中国广播电视学刊  2000-07-25   1.0   52  下载  \n",
       "\n",
       "[900 rows x 8 columns]"
      ]
     },
     "execution_count": 102,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_url_out = pd.concat(l_df).reset_index(drop=True)\n",
    "df_表格 = 数据.append(df_url_out)\n",
    "df_表格"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>篇名</th>\n",
       "      <th>作者</th>\n",
       "      <th>刊名</th>\n",
       "      <th>发表时间</th>\n",
       "      <th>被引</th>\n",
       "      <th>下载</th>\n",
       "      <th>操作</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>“转发”行为的扩散与新媒体赋权——基于微博自闭症议题的社会网络分析</td>\n",
       "      <td>黄月琴; 黄宪成</td>\n",
       "      <td>新闻记者</td>\n",
       "      <td>2021-05-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>397</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>嬗变、冲突与重构：新媒体视域下的网络舆论</td>\n",
       "      <td>陈晓伟; 董烁</td>\n",
       "      <td>中国编辑</td>\n",
       "      <td>2021-05-10</td>\n",
       "      <td>NaN</td>\n",
       "      <td>226</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>新媒体时代图书网络营销矩阵建设实务研究</td>\n",
       "      <td>郑丽珠</td>\n",
       "      <td>出版广角</td>\n",
       "      <td>2021-04-30</td>\n",
       "      <td>NaN</td>\n",
       "      <td>57</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>嵌入基层治理：县级融媒体中心与基层网络政务服务的融合发展</td>\n",
       "      <td>谢新洲; 石林</td>\n",
       "      <td>传媒</td>\n",
       "      <td>2021-04-25</td>\n",
       "      <td>NaN</td>\n",
       "      <td>198</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>长城新媒体集团融合创新春晚形态——“河北网络春节云联欢”云端放异彩</td>\n",
       "      <td>李建; 田少华; 李遥</td>\n",
       "      <td>传媒</td>\n",
       "      <td>2021-04-25</td>\n",
       "      <td>NaN</td>\n",
       "      <td>18</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>845</th>\n",
       "      <td>896</td>\n",
       "      <td>新世纪的网络传播发展——“新世纪网络传播发展国际论坛”研讨会综述</td>\n",
       "      <td>王蕾</td>\n",
       "      <td>新闻记者</td>\n",
       "      <td>2001-06-05</td>\n",
       "      <td>NaN</td>\n",
       "      <td>132</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>846</th>\n",
       "      <td>897</td>\n",
       "      <td>论网络广播──网络广播现状和经营理念</td>\n",
       "      <td>杨叶青</td>\n",
       "      <td>现代传播-北京广播学院学报</td>\n",
       "      <td>2000-12-15</td>\n",
       "      <td>13.0</td>\n",
       "      <td>344</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>847</th>\n",
       "      <td>898</td>\n",
       "      <td>发展新媒体的若干思考——由世界网络所想到的</td>\n",
       "      <td>林涛</td>\n",
       "      <td>中国广播电视学刊</td>\n",
       "      <td>2000-11-25</td>\n",
       "      <td>1.0</td>\n",
       "      <td>76</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>848</th>\n",
       "      <td>899</td>\n",
       "      <td>网络时代的对话与交流——新媒体技术2000年报告会内容纪要</td>\n",
       "      <td>钟新</td>\n",
       "      <td>国际新闻界</td>\n",
       "      <td>2000-09-25</td>\n",
       "      <td>NaN</td>\n",
       "      <td>235</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>849</th>\n",
       "      <td>900</td>\n",
       "      <td>寻求机遇 再造辉煌——网络传播时代中国电视业的生存及发展探析</td>\n",
       "      <td>程莉</td>\n",
       "      <td>中国广播电视学刊</td>\n",
       "      <td>2000-07-25</td>\n",
       "      <td>1.0</td>\n",
       "      <td>52</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>900 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     Unnamed: 0                                 篇名           作者  \\\n",
       "0             1  “转发”行为的扩散与新媒体赋权——基于微博自闭症议题的社会网络分析     黄月琴; 黄宪成   \n",
       "1             2               嬗变、冲突与重构：新媒体视域下的网络舆论      陈晓伟; 董烁   \n",
       "2             3                新媒体时代图书网络营销矩阵建设实务研究          郑丽珠   \n",
       "3             4       嵌入基层治理：县级融媒体中心与基层网络政务服务的融合发展      谢新洲; 石林   \n",
       "4             5  长城新媒体集团融合创新春晚形态——“河北网络春节云联欢”云端放异彩  李建; 田少华; 李遥   \n",
       "..          ...                                ...          ...   \n",
       "845         896   新世纪的网络传播发展——“新世纪网络传播发展国际论坛”研讨会综述           王蕾   \n",
       "846         897                 论网络广播──网络广播现状和经营理念          杨叶青   \n",
       "847         898              发展新媒体的若干思考——由世界网络所想到的           林涛   \n",
       "848         899      网络时代的对话与交流——新媒体技术2000年报告会内容纪要           钟新   \n",
       "849         900     寻求机遇 再造辉煌——网络传播时代中国电视业的生存及发展探析           程莉   \n",
       "\n",
       "                刊名        发表时间    被引   下载  操作  \n",
       "0             新闻记者  2021-05-20   NaN  397  下载  \n",
       "1             中国编辑  2021-05-10   NaN  226  下载  \n",
       "2             出版广角  2021-04-30   NaN   57  下载  \n",
       "3               传媒  2021-04-25   NaN  198  下载  \n",
       "4               传媒  2021-04-25   NaN   18  下载  \n",
       "..             ...         ...   ...  ...  ..  \n",
       "845           新闻记者  2001-06-05   NaN  132  下载  \n",
       "846  现代传播-北京广播学院学报  2000-12-15  13.0  344  下载  \n",
       "847       中国广播电视学刊  2000-11-25   1.0   76  下载  \n",
       "848          国际新闻界  2000-09-25   NaN  235  下载  \n",
       "849       中国广播电视学刊  2000-07-25   1.0   52  下载  \n",
       "\n",
       "[900 rows x 8 columns]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "#把表格保存在本地\n",
    "with pd.ExcelWriter('cnki知网数据.xlsx',mode='w',engine=\"openpyxl\") as writer:  \n",
    "            df_表格.to_excel(writer,sheet_name=\"知网数据\")\n",
    "display(df_表格)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 导出refworks文件（.txt）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "metadata": {},
   "outputs": [],
   "source": [
    "#返回首页\n",
    "element = driver.find_element_by_id('total').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'下一页'"
      ]
     },
     "execution_count": 105,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "element = driver.find_element_by_id('PageNext')\n",
    "element.get_attribute('innerHTML')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "metadata": {},
   "outputs": [],
   "source": [
    "导出_html = dict()\n",
    "main_content_ =\"\"\n",
    "element = None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]\n"
     ]
    }
   ],
   "source": [
    "#因每次下载操作只能500篇，故分两次操作\n",
    "pages = list(range(1,11))\n",
    "print(pages)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "metadata": {},
   "outputs": [],
   "source": [
    "#选中页面50篇并进行翻页\n",
    "def xuanze (pages):\n",
    "    for p in pages:\n",
    "        print (p,end='\\t')\n",
    "        全选 = driver.find_element_by_id('selectCheckAll1')\n",
    "        全选.click()\n",
    "        跳转 = driver.find_element_by_id('PageNext')\n",
    "        跳转.click()\n",
    "        time.sleep(30+10*random())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1\t2\t3\t4\t5\t6\t7\t8\t9\t10\t"
     ]
    }
   ],
   "source": [
    "xuanze (pages)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 110,
   "metadata": {},
   "outputs": [],
   "source": [
    "#第一次导出refworks文件\n",
    "element = driver.find_element_by_xpath('//i[@class=\"icon-d\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "metadata": {},
   "outputs": [],
   "source": [
    "element = driver.find_element_by_xpath('//i[@class=\"icon-r\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "metadata": {},
   "outputs": [],
   "source": [
    "element = driver.find_element_by_xpath('//a[@exporttype=\"Refworks\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['CDwindow-6E7CF300A0C8FC55231D3AB1F6AB0092',\n",
       " 'CDwindow-C9B916AC89729DA5886C030FD7059349',\n",
       " 'CDwindow-AF2DC351C039BBBF495CFD35D491ED87']"
      ]
     },
     "execution_count": 113,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#查看所有窗口id\n",
    "driver.window_handles"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-114-5e47a15b67dc>:2: DeprecationWarning: use driver.switch_to.window instead\n",
      "  driver.switch_to_window(driver.window_handles[2])\n"
     ]
    }
   ],
   "source": [
    "#切换窗口\n",
    "driver.switch_to_window(driver.window_handles[2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "metadata": {},
   "outputs": [],
   "source": [
    "#导出txt文件\n",
    "element = driver.find_element_by_xpath('//i[@class=\"icon icon-export\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-116-2c997ac77236>:2: DeprecationWarning: use driver.switch_to.window instead\n",
      "  driver.switch_to_window(driver.window_handles[1])\n"
     ]
    }
   ],
   "source": [
    "#切换窗口\n",
    "driver.switch_to_window(driver.window_handles[1])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 下载原文"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 117,
   "metadata": {},
   "outputs": [],
   "source": [
    "element = driver.find_element_by_xpath('//li[@class=\"bulkdownload export\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 133,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-133-5578cf30eca2>:2: DeprecationWarning: use driver.switch_to.window instead\n",
      "  driver.switch_to_window(driver.window_handles[3])\n"
     ]
    }
   ],
   "source": [
    "#切换窗口\n",
    "driver.switch_to_window(driver.window_handles[3])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 119,
   "metadata": {},
   "outputs": [],
   "source": [
    "#第一次下载，下载所选500篇原文\n",
    "element = driver.find_element_by_id('btn-download-all').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-120-2c997ac77236>:2: DeprecationWarning: use driver.switch_to.window instead\n",
      "  driver.switch_to_window(driver.window_handles[1])\n"
     ]
    }
   ],
   "source": [
    "#切换窗口\n",
    "driver.switch_to_window(driver.window_handles[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 121,
   "metadata": {},
   "outputs": [],
   "source": [
    "#清除所选的500篇原文\n",
    "element = driver.find_element_by_xpath('//*[@id=\"gridTable\"]/div[1]/div[2]/div[1]/a').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 122,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[11, 12, 13, 14, 15, 16, 17]\n"
     ]
    }
   ],
   "source": [
    "pages = list(range(11,18))\n",
    "print(pages)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "metadata": {},
   "outputs": [],
   "source": [
    "def xuanze (pages):\n",
    "    for p in pages:\n",
    "        print (p,end='\\t')\n",
    "        全选 = driver.find_element_by_id('selectCheckAll1')\n",
    "        全选.click()\n",
    "        跳转 = driver.find_element_by_id('PageNext')\n",
    "        跳转.click()\n",
    "        time.sleep(20+10*random())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "11\t12\t13\t14\t15\t16\t17\t"
     ]
    }
   ],
   "source": [
    "xuanze(pages)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 125,
   "metadata": {},
   "outputs": [],
   "source": [
    "element = driver.find_element_by_xpath('//i[@class=\"icon-d\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 126,
   "metadata": {},
   "outputs": [],
   "source": [
    "element = driver.find_element_by_xpath('//i[@class=\"icon-r\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 127,
   "metadata": {},
   "outputs": [],
   "source": [
    "element = driver.find_element_by_xpath('//a[@exporttype=\"Refworks\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 128,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['CDwindow-6E7CF300A0C8FC55231D3AB1F6AB0092',\n",
       " 'CDwindow-C9B916AC89729DA5886C030FD7059349',\n",
       " 'CDwindow-AF2DC351C039BBBF495CFD35D491ED87',\n",
       " 'CDwindow-CC9C25249CCDE17CB27D008107AC85D7',\n",
       " 'CDwindow-7297CF9B5C3742FC947FF39754BC5ED7']"
      ]
     },
     "execution_count": 128,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#查看所有窗口id\n",
    "driver.window_handles"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 129,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-129-22df7519d47a>:2: DeprecationWarning: use driver.switch_to.window instead\n",
      "  driver.switch_to_window(driver.window_handles[4])\n"
     ]
    }
   ],
   "source": [
    "#切换窗口\n",
    "driver.switch_to_window(driver.window_handles[4])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 130,
   "metadata": {},
   "outputs": [],
   "source": [
    "#导出txt文件\n",
    "element = driver.find_element_by_xpath('//i[@class=\"icon icon-export\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 131,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-131-2c997ac77236>:2: DeprecationWarning: use driver.switch_to.window instead\n",
      "  driver.switch_to_window(driver.window_handles[1])\n"
     ]
    }
   ],
   "source": [
    "#切换窗口\n",
    "driver.switch_to_window(driver.window_handles[1])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 第二次批量下载"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "metadata": {},
   "outputs": [],
   "source": [
    "element = driver.find_element_by_xpath('//li[@class=\"bulkdownload export\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-83-329a6c3737f9>:2: DeprecationWarning: use driver.switch_to.window instead\n",
      "  driver.switch_to_window(driver.window_handles[5])\n"
     ]
    }
   ],
   "source": [
    "#切换窗口id\n",
    "driver.switch_to_window(driver.window_handles[5])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {},
   "outputs": [],
   "source": [
    "#下载剩下的400篇所选文章\n",
    "element = driver.find_element_by_id('btn-download-all').click()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
